## ---------------------------------------
## DESCRIPTIVES
## Replication file for Balcells, Laia and Francisco Villamil (2020)
## 'The double logic of internal purges: New evidence from Francoist Spain.'
## Nationalism and Ethnic Politics, 26(3) (forthcoming)
## Revised: July 2020
## ---------------------------------------

if(!grepl("replication$", getwd())){
  print("Choose any file in the replication folder (e.g. analyses.R)")
  dir = file.choose()
  setwd(gsub("replication(/.*)$", "replication", dir))
}

options(stringsAsFactors = FALSE)
pkg = c("ggplot2", "scales", "ggthemes", "xtable")
if(!all(pkg %in% rownames(installed.packages()))){
  pkg_no = pkg[!pkg %in% rownames(installed.packages())]
  warning(paste0("Installing: ", paste(pkg_no, collapse = ", ")), immediate. = TRUE)
  install.packages(pkg_no)
  }
lapply(pkg, require, character.only = TRUE)

## Loading data
data = read.csv("dataset.csv")

## Preparation
data$BOP = as.Date(data$BOP)

## Purges over time by province ##

# Create province label
data$prov_label = paste0(toupper(substr(data$provincia, 1, 1)),
  substr(data$provincia, 2, 20))
# Print plot
pdf("time_variation_by_prov.pdf", width = 7, height = 7)
ggplot(data, aes(x = BOP)) +
  geom_histogram(binwidth = 60) +
  facet_wrap(~prov_label, ncol = 2, scales = "free") +
  scale_x_date(labels = date_format("%Y"), date_breaks = "1 year",
    limits = c(min(data$BOP,na.rm=T), max(data$BOP,na.rm=T))) +
  geom_vline(aes(xintercept = as.numeric(as.Date("1939-04-01"))), color = "red") +
  theme_classic() +
  theme(
    legend.title = element_blank(),
    legend.background = element_blank(),
    panel.grid = element_blank(),
    panel.grid.major.y = element_line(size = 0.5, color = gray(0.95)),
    strip.text = element_text(size = 12),
    strip.background = element_blank()) +
  labs(x = "", y = "",
    subtitle = "(Red line indicates the end of the civil war on April 1, 1939)")
dev.off()

## Descriptives details maestros ##

# Factor and change labels
data$gender[is.na(data$gender)] = "Unknown"
gender = factor(data$gender)
levels(gender) = paste0(toupper(substr(levels(gender), 1, 1)),
  substr(levels(gender), 2, 20))
provincia = factor(data$prov_label)
# Create outcomes/charges tables
desc_by_prov = list(
  table(factor(data$confirmacion), provincia),
  table(factor(data$traslado_region), provincia),
  table(factor(data$traslado_prov), provincia),
  table(factor(data$inhabilitacion), provincia),
  table(factor(data$cargos_any), provincia),
  table(factor(data$A_militancia), provincia),
  table(factor(data$B_nacionalismo), provincia),
  table(factor(data$C_actitudesCN), provincia),
  table(factor(data$D_izquierdas), provincia))
# Modify to percentages and add total
desc_by_prov = lapply(desc_by_prov, function(x){
  x = cbind(x, rowSums(x));
  x = round(prop.table(x, 2)*100, 1);
  x = x[dimnames(x)[[1]] == "1", ];
  x = paste0(sprintf("%01.1f", x), "%");
  x})
desc_by_prov = matrix(unlist(desc_by_prov), ncol = 9, byrow = TRUE)
rownames(desc_by_prov) = c("Confirmation", "Relocation region", "Relocation province",
  "Inhabilitation", "Any charges", "Militancy", "Nationalism", "Attitudes against CN", "Leftist")
# Create gender / province table with absolute numbers
gender_prov = table(gender, provincia)
gender_prov = rbind(gender_prov, colSums(gender_prov))
gender_prov = cbind(gender_prov, rowSums(gender_prov))
colnames(gender_prov)[9] = "Total"
rownames(gender_prov)[rownames(gender_prov) == ""] = "Total"
# Join together
desc_by_prov = rbind(gender_prov, desc_by_prov)
# Print table
desc_by_prov
